/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */




/**
   @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
*/

package jp.ac.tohoku.ecei.cl.www.base;

import java.util.ArrayList;
import java.io.*;
import java.util.Iterator;
import java.util.HashMap;
import java.rmi.dgc.VMID;

//import gnu.trove.list.array.*;
import gnu.trove.map.hash.*;
//import gnu.trove.set.hash.*;
/**
 *  A mapping between integers and objects where the mapping in each
 * direction is efficient.  Integers are assigned consecutively, starting
 * at zero, as objects are added to the Alphabet.  Objects can not be
 * deleted from the Alphabet and thus the integers are never reused.
 * <p>
 * The most common use of an alphabet is as a dictionary of feature names
 * associated with a {@link edu.umass.cs.mallet.base.types.FeatureVector} in an
 * {@link edu.umass.cs.mallet.base.types.Instance}. In a simple document
 * classification usage,
 * each unique word in a document would be a unique entry in the Alphabet
 * with a unique integer associated with it.   FeatureVectors rely on
 * the integer part of the mapping to efficiently represent the subset of
 * the Alphabet present in the FeatureVector.
 * @see FeatureVector
 * @see Instance
 * @see edu.umass.cs.mallet.base.pipe.Pipe
 */
public class Alphabet implements Serializable
{
  TObjectIntHashMap map;
  ArrayList entries;
  boolean growthStopped = false;
  Class entryClass = null;
  VMID instanceId = new VMID();  //used in readResolve to identify persitent instances

  public Alphabet (int capacity, Class entryClass)
  {
    this.map = new TObjectIntHashMap (capacity);
    this.entries = new ArrayList (capacity);
    this.entryClass = entryClass;
    // someone could try to deserialize us into this image (e.g., by RMI).  Handle this.
    deserializedEntries.put (instanceId, this);
  }

  public Alphabet (Class entryClass)
  {
    this (8, entryClass);
  }

  public Alphabet (int capacity)
  {
    this (capacity, null);
  }

  public Alphabet ()
  {
    this (8, null);
  }

  public Object clone ()
  {
    //try {
    // Wastes effort, because we over-write ivars we create
    Alphabet ret = new Alphabet ();
    ret.map = new TObjectIntHashMap(map);
    ret.entries = (ArrayList) entries.clone();
    ret.growthStopped = growthStopped;
    ret.entryClass = entryClass;
    return ret;
    //} catch (CloneNotSupportedException e) {
    //e.printStackTrace();
    //throw new IllegalStateException ("Couldn't clone InstanceList Vocabuary");
    //}
  }

  /** Return -1 if entry isn't present. */
  public int lookupIndex (Object entry, boolean addIfNotPresent)
  {
    if (entry == null)
      throw new IllegalArgumentException ("Can't lookup \"null\" in an Alphabet.");
    if (entryClass == null)
      entryClass = entry.getClass();
    else {
      // Insist that all entries in the Alphabet are of the same
      // class.  This may not be strictly necessary, but will catch a
      // bunch of easily-made errors.
      if (entry.getClass() != entryClass) {
	//System.out.println("entry.getClass() = " + entry.getClass());
	//System.out.println("entryClass       = " + entryClass);
	throw new IllegalArgumentException ("Non-matching entry class, "+entry.getClass()+", was "+entryClass);
      }
    }
    int retIndex = -1;
    if (map.containsKey( entry )) {
      retIndex = map.get( entry );
    }
    else if (!growthStopped && addIfNotPresent) {
      synchronized (this) {
	retIndex = entries.size();
	map.put (entry, retIndex);
	entries.add (entry);
      }
    }
    return retIndex;
  }

  public int lookupIndex (Object entry)
  {
    return lookupIndex (entry, true);
  }

  public Object lookupObject (int index)
  {
    return entries.get(index);
  }

  public Object[] toArray () {
    return entries.toArray();
  }

  /**
   * Returns an array containing all the entries in the Alphabet.
   *  The runtime type of the returned array is the runtime type of in.
   *  If in is large enough to hold everything in the alphabet, then it
   *  it used.  The returned array is such that for all entries <tt>obj</tt>,
   *  <tt>ret[lookupIndex(obj)] = obj</tt> .
   */ 
  public Object[] toArray (Object[] in) {
    return entries.toArray (in);
  }

  // xxx This should disable the iterator's remove method...
  public Iterator iterator () {
    return entries.iterator();
  }

  public Object[] lookupObjects (int[] indices)
  {
    Object[] ret = new Object[indices.length];
    for (int i = 0; i < indices.length; i++)
      ret[i] = entries.get(indices[i]);
    return ret;
  }

  /**
   * Returns an array of the objects corresponding to
   * @param indices An array of indices to look up
   * @param buf An array to store the returned objects in.
   * @return An array of values from this Alphabet.  The runtime type of the array is the same as buf
   */
  public Object[] lookupObjects (int[] indices, Object[] buf)
  {
    for (int i = 0; i < indices.length; i++)
      buf[i] = entries.get(indices[i]);
    return buf;
  }

  public int[] lookupIndices (Object[] objects, boolean addIfNotPresent)
  {
    int[] ret = new int[objects.length];
    for (int i = 0; i < objects.length; i++)
      ret[i] = lookupIndex (objects[i], addIfNotPresent);
    return ret;
  }

  public boolean contains (Object entry)
  {
    return map.contains (entry);
  }

  public int size ()
  {
    return entries.size();
  }

  public void stopGrowth ()
  {
    growthStopped = true;
  }

  public void startGrowth ()
  {
    growthStopped = false;
  }

  public boolean growthStopped ()
  {
    return growthStopped;
  }

  public Class entryClass ()
  {
    return entryClass;
  }

  /** Return String representation of all Alphabet entries, each
      separated by a newline. */
  public String toString()
  {
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < entries.size(); i++) {
      sb.append (entries.get(i).toString());
      sb.append ('\n');
    }
    return sb.toString();
  }

  public void dump () { dump (System.out); }

  public void dump (PrintStream out)
  {
    dump (new PrintWriter (new OutputStreamWriter (out), true));
  }

  public void dump (PrintWriter out)
  {
    for (int i = 0; i < entries.size(); i++) {
      out.println (i+" => "+entries.get (i));
    }
  }

  public VMID getInstanceId() { return instanceId;} // for debugging
  public void setInstanceId(VMID id) { this.instanceId = id; }
  // Serialization

  private static final long serialVersionUID = 1;
  private static final int CURRENT_SERIAL_VERSION = 1;

  private void writeObject (ObjectOutputStream out) throws IOException {
    out.writeInt (CURRENT_SERIAL_VERSION);
    out.writeInt (entries.size());
    for (int i = 0; i < entries.size(); i++)
      out.writeObject (entries.get(i));
    out.writeBoolean (growthStopped);
    out.writeObject (entryClass);
    out.writeObject(instanceId);
  }

  private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {
    int version = in.readInt ();
    int size = in.readInt();
    entries = new ArrayList (size);
    map = new TObjectIntHashMap (size);
    for (int i = 0; i < size; i++) {
      Object o = in.readObject();
      map.put (o, i);
      entries. add (o);
    }
    growthStopped = in.readBoolean();
    entryClass = (Class) in.readObject();
    if (version >0 ){ // instanced id added in version 1S
      instanceId = (VMID) in.readObject();
    }
  }

  private transient static HashMap deserializedEntries = new HashMap();
  /**
   * This gets called after readObject; it lets the object decide whether
   * to return itself or return a previously read in version.
   * We use a hashMap of instanceIds to determine if we have already read
   * in this object.
   * @return
   * @throws ObjectStreamException
   */

  public Object readResolve() throws ObjectStreamException {
    Object previous = deserializedEntries.get(instanceId);
    if (previous != null){
      //System.out.println(" ***Alphabet ReadResolve:Resolving to previous instance. instance id= " + instanceId);
      return previous;
    }
    if (instanceId != null){
      deserializedEntries.put(instanceId, this);
    }
    //System.out.println(" *** Alphabet ReadResolve: new instance. instance id= " + instanceId);
    return this;
  }
}
